{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "Python 3.7.1 64-bit ('base': conda)",
   "display_name": "Python 3.7.1 64-bit ('base': conda)",
   "metadata": {
    "interpreter": {
     "hash": "2266c607543d224cb119288ea55888d6fda87cc9a4c78c02ed099d39082a76ce"
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pdb_profiling import default_config\n",
    "\n",
    "default_config(\"C:/GitWorks/pdb-profiling/test/demo\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pdb_profiling.processors.i3d.api import Interactome3D"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Wall time: 18 ms\n"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/split/queryProt1=P01848&queryProt2=A0A5B9.xml')"
      ]
     },
     "metadata": {},
     "execution_count": 4
    }
   ],
   "source": [
    "%time Interactome3D.fetch('api/getInteractionStructures', queryProt1='P01848', queryProt2='A0A5B9').result()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pandas import read_csv, concat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "             PROT1   PROT2  RANK_MAJOR  RANK_MINOR       TYPE PDB_ID BIO_UNIT  \\\n",
       "0       A0A0B4JDC9  P91645           1           0  Structure   4z8a        1   \n",
       "1       A0A0B4JDC9  P91645           2           0  Structure   4z89       10   \n",
       "2       A0A0B4JDC9  P91645           3           0  Structure   4z89        3   \n",
       "3       A0A0B4JDC9  P91645           4           0  Structure   4z89        1   \n",
       "4       A0A0B4JDC9  P91645           5           0  Structure   4z89        4   \n",
       "...            ...     ...         ...         ...        ...    ...      ...   \n",
       "236112      Q9N0F3  Q9N0F3           1           0  Structure   1wle        1   \n",
       "236113      Q9TTF5  Q9TTF5           1           0  Structure   3gc6        1   \n",
       "236114      Q9TTF5  Q9TTF5           2           0  Structure   3gc6        2   \n",
       "236115      Q9TTF5  Q9TTF5           3           0  Structure   3gh3        3   \n",
       "236116      Q9TTF5  Q9TTF5           4           0  Structure   3ghh        1   \n",
       "\n",
       "       CHAIN1 MODEL1  SEQ_IDENT1  ...  DOMAIN1  CHAIN2  MODEL2 SEQ_IDENT2  \\\n",
       "0           A      0        98.5  ...        -       B       0      100.0   \n",
       "1           J      0       100.0  ...        -       j       0      100.0   \n",
       "2           C      0       100.0  ...        -       c       0      100.0   \n",
       "3           A      0       100.0  ...        -       a       0      100.0   \n",
       "4           D      0       100.0  ...        -       d       0      100.0   \n",
       "...       ...    ...         ...  ...      ...     ...     ...        ...   \n",
       "236112      B      0       100.0  ...        -       A       0       99.6   \n",
       "236113      B      1        99.6  ...        -       A       0       99.6   \n",
       "236114      B      0        99.6  ...        -       A       0       99.6   \n",
       "236115      B      0       100.0  ...        -       A       0      100.0   \n",
       "236116      B      0        99.6  ...        -       A       0       99.6   \n",
       "\n",
       "       COVERAGE2 SEQ_BEGIN2  SEQ_END2  DOMAIN2  \\\n",
       "0            0.8       1688      1702        -   \n",
       "1            0.6       1689      1700        -   \n",
       "2            0.6       1689      1700        -   \n",
       "3            0.6       1689      1700        -   \n",
       "4            0.6       1689      1700        -   \n",
       "...          ...        ...       ...      ...   \n",
       "236112      90.2         41       507        -   \n",
       "236113      87.1         36       277        -   \n",
       "236114      87.1         36       277        -   \n",
       "236115      86.7         36       276        -   \n",
       "236116      85.6         40       277        -   \n",
       "\n",
       "                                            FILENAME  organism  \n",
       "0        A0A0B4JDC9-P91645-EXP-4z8a.pdb1-A-0-B-0.pdb       fly  \n",
       "1       A0A0B4JDC9-P91645-EXP-4z89.pdb10-J-0-j-0.pdb       fly  \n",
       "2        A0A0B4JDC9-P91645-EXP-4z89.pdb3-C-0-c-0.pdb       fly  \n",
       "3        A0A0B4JDC9-P91645-EXP-4z89.pdb1-A-0-a-0.pdb       fly  \n",
       "4        A0A0B4JDC9-P91645-EXP-4z89.pdb4-D-0-d-0.pdb       fly  \n",
       "...                                              ...       ...  \n",
       "236112       Q9N0F3-Q9N0F3-EXP-1wle.pdb1-B-0-A-0.pdb   btaurus  \n",
       "236113       Q9TTF5-Q9TTF5-EXP-3gc6.pdb1-B-1-A-0.pdb   btaurus  \n",
       "236114       Q9TTF5-Q9TTF5-EXP-3gc6.pdb2-B-0-A-0.pdb   btaurus  \n",
       "236115       Q9TTF5-Q9TTF5-EXP-3gh3.pdb3-B-0-A-0.pdb   btaurus  \n",
       "236116       Q9TTF5-Q9TTF5-EXP-3ghh.pdb1-B-0-A-0.pdb   btaurus  \n",
       "\n",
       "[236117 rows x 23 columns]"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>PROT1</th>\n      <th>PROT2</th>\n      <th>RANK_MAJOR</th>\n      <th>RANK_MINOR</th>\n      <th>TYPE</th>\n      <th>PDB_ID</th>\n      <th>BIO_UNIT</th>\n      <th>CHAIN1</th>\n      <th>MODEL1</th>\n      <th>SEQ_IDENT1</th>\n      <th>...</th>\n      <th>DOMAIN1</th>\n      <th>CHAIN2</th>\n      <th>MODEL2</th>\n      <th>SEQ_IDENT2</th>\n      <th>COVERAGE2</th>\n      <th>SEQ_BEGIN2</th>\n      <th>SEQ_END2</th>\n      <th>DOMAIN2</th>\n      <th>FILENAME</th>\n      <th>organism</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>A0A0B4JDC9</td>\n      <td>P91645</td>\n      <td>1</td>\n      <td>0</td>\n      <td>Structure</td>\n      <td>4z8a</td>\n      <td>1</td>\n      <td>A</td>\n      <td>0</td>\n      <td>98.5</td>\n      <td>...</td>\n      <td>-</td>\n      <td>B</td>\n      <td>0</td>\n      <td>100.0</td>\n      <td>0.8</td>\n      <td>1688</td>\n      <td>1702</td>\n      <td>-</td>\n      <td>A0A0B4JDC9-P91645-EXP-4z8a.pdb1-A-0-B-0.pdb</td>\n      <td>fly</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>A0A0B4JDC9</td>\n      <td>P91645</td>\n      <td>2</td>\n      <td>0</td>\n      <td>Structure</td>\n      <td>4z89</td>\n      <td>10</td>\n      <td>J</td>\n      <td>0</td>\n      <td>100.0</td>\n      <td>...</td>\n      <td>-</td>\n      <td>j</td>\n      <td>0</td>\n      <td>100.0</td>\n      <td>0.6</td>\n      <td>1689</td>\n      <td>1700</td>\n      <td>-</td>\n      <td>A0A0B4JDC9-P91645-EXP-4z89.pdb10-J-0-j-0.pdb</td>\n      <td>fly</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>A0A0B4JDC9</td>\n      <td>P91645</td>\n      <td>3</td>\n      <td>0</td>\n      <td>Structure</td>\n      <td>4z89</td>\n      <td>3</td>\n      <td>C</td>\n      <td>0</td>\n      <td>100.0</td>\n      <td>...</td>\n      <td>-</td>\n      <td>c</td>\n      <td>0</td>\n      <td>100.0</td>\n      <td>0.6</td>\n      <td>1689</td>\n      <td>1700</td>\n      <td>-</td>\n      <td>A0A0B4JDC9-P91645-EXP-4z89.pdb3-C-0-c-0.pdb</td>\n      <td>fly</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>A0A0B4JDC9</td>\n      <td>P91645</td>\n      <td>4</td>\n      <td>0</td>\n      <td>Structure</td>\n      <td>4z89</td>\n      <td>1</td>\n      <td>A</td>\n      <td>0</td>\n      <td>100.0</td>\n      <td>...</td>\n      <td>-</td>\n      <td>a</td>\n      <td>0</td>\n      <td>100.0</td>\n      <td>0.6</td>\n      <td>1689</td>\n      <td>1700</td>\n      <td>-</td>\n      <td>A0A0B4JDC9-P91645-EXP-4z89.pdb1-A-0-a-0.pdb</td>\n      <td>fly</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>A0A0B4JDC9</td>\n      <td>P91645</td>\n      <td>5</td>\n      <td>0</td>\n      <td>Structure</td>\n      <td>4z89</td>\n      <td>4</td>\n      <td>D</td>\n      <td>0</td>\n      <td>100.0</td>\n      <td>...</td>\n      <td>-</td>\n      <td>d</td>\n      <td>0</td>\n      <td>100.0</td>\n      <td>0.6</td>\n      <td>1689</td>\n      <td>1700</td>\n      <td>-</td>\n      <td>A0A0B4JDC9-P91645-EXP-4z89.pdb4-D-0-d-0.pdb</td>\n      <td>fly</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>236112</th>\n      <td>Q9N0F3</td>\n      <td>Q9N0F3</td>\n      <td>1</td>\n      <td>0</td>\n      <td>Structure</td>\n      <td>1wle</td>\n      <td>1</td>\n      <td>B</td>\n      <td>0</td>\n      <td>100.0</td>\n      <td>...</td>\n      <td>-</td>\n      <td>A</td>\n      <td>0</td>\n      <td>99.6</td>\n      <td>90.2</td>\n      <td>41</td>\n      <td>507</td>\n      <td>-</td>\n      <td>Q9N0F3-Q9N0F3-EXP-1wle.pdb1-B-0-A-0.pdb</td>\n      <td>btaurus</td>\n    </tr>\n    <tr>\n      <th>236113</th>\n      <td>Q9TTF5</td>\n      <td>Q9TTF5</td>\n      <td>1</td>\n      <td>0</td>\n      <td>Structure</td>\n      <td>3gc6</td>\n      <td>1</td>\n      <td>B</td>\n      <td>1</td>\n      <td>99.6</td>\n      <td>...</td>\n      <td>-</td>\n      <td>A</td>\n      <td>0</td>\n      <td>99.6</td>\n      <td>87.1</td>\n      <td>36</td>\n      <td>277</td>\n      <td>-</td>\n      <td>Q9TTF5-Q9TTF5-EXP-3gc6.pdb1-B-1-A-0.pdb</td>\n      <td>btaurus</td>\n    </tr>\n    <tr>\n      <th>236114</th>\n      <td>Q9TTF5</td>\n      <td>Q9TTF5</td>\n      <td>2</td>\n      <td>0</td>\n      <td>Structure</td>\n      <td>3gc6</td>\n      <td>2</td>\n      <td>B</td>\n      <td>0</td>\n      <td>99.6</td>\n      <td>...</td>\n      <td>-</td>\n      <td>A</td>\n      <td>0</td>\n      <td>99.6</td>\n      <td>87.1</td>\n      <td>36</td>\n      <td>277</td>\n      <td>-</td>\n      <td>Q9TTF5-Q9TTF5-EXP-3gc6.pdb2-B-0-A-0.pdb</td>\n      <td>btaurus</td>\n    </tr>\n    <tr>\n      <th>236115</th>\n      <td>Q9TTF5</td>\n      <td>Q9TTF5</td>\n      <td>3</td>\n      <td>0</td>\n      <td>Structure</td>\n      <td>3gh3</td>\n      <td>3</td>\n      <td>B</td>\n      <td>0</td>\n      <td>100.0</td>\n      <td>...</td>\n      <td>-</td>\n      <td>A</td>\n      <td>0</td>\n      <td>100.0</td>\n      <td>86.7</td>\n      <td>36</td>\n      <td>276</td>\n      <td>-</td>\n      <td>Q9TTF5-Q9TTF5-EXP-3gh3.pdb3-B-0-A-0.pdb</td>\n      <td>btaurus</td>\n    </tr>\n    <tr>\n      <th>236116</th>\n      <td>Q9TTF5</td>\n      <td>Q9TTF5</td>\n      <td>4</td>\n      <td>0</td>\n      <td>Structure</td>\n      <td>3ghh</td>\n      <td>1</td>\n      <td>B</td>\n      <td>0</td>\n      <td>99.6</td>\n      <td>...</td>\n      <td>-</td>\n      <td>A</td>\n      <td>0</td>\n      <td>99.6</td>\n      <td>85.6</td>\n      <td>40</td>\n      <td>277</td>\n      <td>-</td>\n      <td>Q9TTF5-Q9TTF5-EXP-3ghh.pdb1-B-0-A-0.pdb</td>\n      <td>btaurus</td>\n    </tr>\n  </tbody>\n</table>\n<p>236117 rows × 23 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 6
    }
   ],
   "source": [
    "dfs = []\n",
    "for org in Interactome3D.organisms:\n",
    "    df = read_csv(Interactome3D.retrieve_metadata(org), sep='\\t')\n",
    "    df['organism'] = org\n",
    "    dfs.append(df[df.TYPE.eq('Structure')])\n",
    "\n",
    "dfs = concat(dfs, sort=False, ignore_index=True)\n",
    "dfs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "100%|██████████| 18/18 [00:01<00:00, 12.10it/s]\n",
      "2020-10-16 16:48:33,637 PDB-Profiling INFO 18 chunks downloaded in 1.50s\n",
      "Wall time: 1.51 s\n"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "[WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/fly_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/mouse_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/tpallidum_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/mtuberculosis_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/btaurus_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/rat_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/spombe_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/yeast_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/human_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/athaliana_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/ecoli_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/bsubtilis_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/pfalciparum_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/ssp_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/mpneumoniae_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/celegans_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/cjejuni_complete_interactions.tsv'),\n",
       " WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/hpylori_complete_interactions.tsv')]"
      ]
     },
     "metadata": {},
     "execution_count": 7
    }
   ],
   "source": [
    "%time Interactome3D.retrieve_all_meta()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Wall time: 9.97 ms\n"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "WindowsPath('C:/GitWorks/pdb-profiling/test/demo/interactome3d/split/P12975-P19569-EXP-4kt0.pdb1-E-0-D-0.pdb')"
      ]
     },
     "metadata": {},
     "execution_count": 8
    }
   ],
   "source": [
    "%time Interactome3D.fetch('api/getPdbFile', filename='P12975-P19569-EXP-4kt0.pdb1-E-0-D-0.pdb', type='interaction').result()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ]
}